# Clears work environment
rm(list = ls())

# Sets working directory
setwd('C:/Users/Jason/Box Sync/Home Folder jdt34/733 - Maximum Likelihood Estimation/Lai & Slater 2006 replication')

# Loads data and libraries
load(file='cleanedLSdata.RData')
loadPkg(c(packs, 'DataCombine'))

# Constructs [open] variable from World Bank data
imports = read.csv(file=paste0(getwd(), '/data zips/data/imports.csv'))
imports = imports[,c(1:4, 38:51)]
imports = melt(imports)
imports = imports[,c(1:2, 5:6)]
colnames(imports) = c('name', 'iso', 'year', 'imports')
imports$year = sapply(imports$year, FUN=function(y) {as.numeric(substr(as.character(y), 2, 5))})
imports$ccode = countrycode(imports$iso, origin='iso3c', destination='cown')

exports = read.csv(file=paste0(getwd(), '/data zips/data/exports.csv'))
exports = exports[,c(1:4, 38:51)]
exports = melt(exports)
exports = exports[,c(1:2, 5:6)]
colnames(exports) = c('name', 'iso', 'year', 'exports')
exports$year = sapply(exports$year, FUN=function(y) {as.numeric(substr(as.character(y), 2, 5))})
exports$ccode = countrycode(exports$iso, origin='iso3c', destination='cown')

gdp = read.csv(file=paste0(getwd(), '/data zips/data/gdp.csv'))
gdp = gdp[,c(1:4, 38:51)]
gdp = melt(gdp)
gdp = gdp[,c(1:2, 5:6)]
colnames(gdp) = c('name', 'iso', 'year', 'gdp')
gdp$year = sapply(gdp$year, FUN=function(y) {as.numeric(substr(as.character(y), 2, 5))})
gdp$ccode = countrycode(gdp$iso, origin='iso3c', destination='cown')

wbmerge = merge(imports, exports)
wbmerge = merge(wbmerge, gdp)
wbmerge$open = with(wbmerge, (imports + exports) / gdp)
wbwhole = wbmerge[which(!is.na(wbmerge$open) & !is.na(wbmerge$ccode)),]
dfOpen = wbwhole[,c('year', 'ccode', 'open')]

# Constructs [cap] variable from COW National Material Capabilities data
nmc = read.csv(file=paste0(getwd(), '/data zips/data/capabilities.csv'))
dfCap = nmc[,c('year', 'ccode', 'cinc')]
dfCap = dfCap[which(dfCap$year>=1993),]

# Constructs [total] variable from COW Direct Contiguity data
dc = read.csv(file=paste0(getwd(), '/data zips/data/borders.csv'))
dfTotal = dc[,c('year', 'stateno', 'total')]
colnames(dfTotal)[2] = 'ccode'
dfTotal = dfTotal[which(dfTotal$year>=1993),]

# Constructs [totalallies] variable from COW Formal Alliances data
fa = read.csv(file=paste0(getwd(), '/data zips/data/allies.csv'))
dfTotalallies = fa[which(fa$year>=1993), c('year', 'ccode')]
dfTotalallies$dummy = 1
dfTotalallies = melt(tapply(dfTotalallies$dummy, list(dfTotalallies[,1], dfTotalallies[,2]), sum))
colnames(dfTotalallies) = c('year', 'ccode', 'totalallies')

# Constructs regime type dummies from Banks TSCS and Polity data
polity = read.spss(file=paste0(getwd(), '/data zips/data/polity.sav'), to.data.frame=T)
dfPolity = polity[which(polity$year>=1993), c('year', 'ccode', 'polity', 'xconst')]
banks = read.csv(file=paste0(getwd(), '/data zips/data/banks.csv'))
dfBanks = banks[which(banks$year>=1993), c('Wbcode', 'year', 'polit02')]
dfBanks$ccode = countrycode(dfBanks$Wbcode, origin='wb', destination='cown')
dfBanks = dfBanks[,2:4]
dfRegimes = merge(dfPolity, dfBanks)
dfRegimes$democracy = dfRegimes$machine = dfRegimes$junta = dfRegimes$boss = dfRegimes$strongman = NA
indexDemocracy = which(dfRegimes$polity>=6)
dfRegimes$democracy[indexDemocracy] = 1
dfRegimes$machine[indexDemocracy] = dfRegimes$junta[indexDemocracy] = dfRegimes$boss[indexDemocracy] = dfRegimes$strongman[indexDemocracy] = 0
dfRegimes$democracy[which(dfRegimes$polity<6)] = 0
indexMachine = which(dfRegimes$polity<6 & dfRegimes$xconst>=3 & dfRegimes$polit02==1)
dfRegimes$machine[indexMachine] = 1
dfRegimes$junta[indexMachine] = dfRegimes$boss[indexMachine] = dfRegimes$strongman[indexMachine] = 0
indexJunta = which(dfRegimes$polity<6 & dfRegimes$xconst>=3 & dfRegimes$polit02>1 & dfRegimes$polit02<4)
dfRegimes$junta[indexJunta] = 1
dfRegimes$machine[indexJunta] = dfRegimes$boss[indexJunta] = dfRegimes$strongman[indexJunta] = 0
indexBoss = which(dfRegimes$polity<6 & dfRegimes$xconst<3 & dfRegimes$polit02==1)
dfRegimes$boss[indexBoss] = 1
dfRegimes$machine[indexBoss] = dfRegimes$junta[indexBoss] = dfRegimes$strongman[indexBoss] = 0
indexStrongman = which(dfRegimes$polity<6 & dfRegimes$xconst<3 & dfRegimes$polit02>1 & dfRegimes$polit02<4)
dfRegimes$strongman[indexStrongman] = 1
dfRegimes$machine[indexStrongman] = dfRegimes$junta[indexStrongman] = dfRegimes$boss[indexStrongman] = 0
dfRegimes = na.omit(dfRegimes[,c('year', 'ccode', 'democracy', 'machine', 'junta', 'boss', 'strongman')])

# Constructs [majorinitone] and [majorinitonelag] from COW Militarized Interstate Disputes data
mids = read.csv(file=paste0(getwd(), '/data zips/data/MIDs.csv'))
dfMids = mids[which(mids$StYear>=1992 & mids$HostLev>=4 & mids$SideA==1), c('StYear', 'ccode')]
dfMids$dummy = 1
midsCount = by(dfMids[,3], list(dfMids[,1], dfMids[,2]), sum, simplify=F)
nulls = t(do.call(cbind, lapply(lapply(midsCount, FUN=function(x) {is.null(x)}), data.frame)))
counts = do.call(rbind.data.frame, midsCount)
dfMids = expand.grid(attr(midsCount, 'dimnames')[[1]], attr(midsCount, 'dimnames')[[2]])
dfMids$majorinitone = 0
for(i in 1:length(which(nulls==F))) {
  dfMids$majorinitone[which(nulls==F)[i]] = counts[i,1]
}
colnames(dfMids)[1:2] = c('year', 'ccode')
dfMids = slide(dfMids, Var='majorinitone', GroupVar='ccode', slideBy=-1)
colnames(dfMids)[4] = 'majorinitonelag'
dfMids$year = as.numeric(as.character(dfMids$year))
dfMids$ccode = as.numeric(as.character(dfMids$ccode))

# Combines data frames;   initially:  1881 & 2799 observations
step1 = merge(dfMids, dfRegimes)    # 1487 observations
step2 = merge(step1, dfTotal)       # 1290 observations
step3 = merge(step2, dfCap)         # 1290 observations
step4 = merge(step3, dfOpen)        # 1049 observations
step5 = merge(step4, dfTotalallies) #  951 observations
newdata = step5[,c(2, 1, 3, 5:13, 4)]
colnames(newdata) = colnames(lsm)[1:13]
newdata = na.omit(newdata)          #  860 observations

# Runs models on out-of-sample data
newdata$nb = LSpredictor(newdata)
newdata$zi = JTpredictor(newdata)

save(file='newdata.RData', newdata)
